{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "09007510",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "3f7547a1beb14d89807d54b4a1f86232",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "'/home/mesolitica/stt/Malaysian-UltraChat-Speech-Multiturn-Instructions'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from huggingface_hub import snapshot_download\n",
    "\n",
    "snapshot_download(\n",
    "    repo_id=\"mesolitica/Malaysian-UltraChat-Speech-Multiturn-Instructions\",\n",
    "    repo_type='dataset',\n",
    "    allow_patterns=\"data/*.parquet\",\n",
    "    local_dir=\"./Malaysian-UltraChat-Speech-Multiturn-Instructions\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "2fda8a33",
   "metadata": {},
   "outputs": [],
   "source": [
    "# snapshot_download(\n",
    "#     repo_id=\"mesolitica/Malaysian-UltraChat-Speech-Multiturn-Instructions\",\n",
    "#     repo_type='dataset',\n",
    "#     allow_patterns=\"ultrachat-speech-*.zip\",\n",
    "#     ignore_patterns=[\"*alignment.zip\"],\n",
    "#     local_dir=\"./\",\n",
    "# )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "f9983988",
   "metadata": {},
   "outputs": [],
   "source": [
    "from glob import glob\n",
    "from tqdm import tqdm\n",
    "from multiprocess import Pool\n",
    "import itertools\n",
    "import zipfile\n",
    "import os\n",
    "\n",
    "def chunks(l, n):\n",
    "    for i in range(0, len(l), n):\n",
    "        yield (l[i: i + n], i // n)\n",
    "\n",
    "\n",
    "def multiprocessing(strings, function, cores=6, returned=True):\n",
    "    df_split = chunks(strings, len(strings) // cores)\n",
    "    pool = Pool(cores)\n",
    "    pooled = pool.map(function, df_split)\n",
    "    pool.close()\n",
    "    pool.join()\n",
    "\n",
    "    if returned:\n",
    "        return list(itertools.chain(*pooled))\n",
    "\n",
    "def loop(files):\n",
    "    files, _ = files\n",
    "    for zip_file_path in tqdm(files):\n",
    "        destination_folder = './'\n",
    "        with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:\n",
    "            zip_ref.extractall(destination_folder)\n",
    "        os.remove(zip_file_path)\n",
    "\n",
    "files = glob('ultrachat-speech*.zip')\n",
    "if len(files):\n",
    "    multiprocessing(files, loop, cores = min(len(files), 20), returned = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "d90f0c56",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.4\n",
      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
     ]
    }
   ],
   "source": [
    "from glob import glob\n",
    "import pandas as pd\n",
    "import os\n",
    "import json\n",
    "from transformers import AutoProcessor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "7f697a3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "processor = AutoProcessor.from_pretrained(\"Qwen/Qwen2-Audio-7B-Instruct\")\n",
    "tokenizer = processor.tokenizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "52d088dd",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train-00000-of-00002.parquet  voice_assistant-00000-of-00001.parquet\r\n",
      "train-00001-of-00002.parquet\r\n"
     ]
    }
   ],
   "source": [
    "!ls Malaysian-UltraChat-Speech-Multiturn-Instructions/data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2158be18",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "192821"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = []\n",
    "for f in glob('Malaysian-UltraChat-Speech-Multiturn-Instructions/data/*.parquet'):\n",
    "    data.extend(pd.read_parquet(f).to_dict(orient = 'records'))\n",
    "    \n",
    "len(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "59efdb72",
   "metadata": {},
   "outputs": [],
   "source": [
    "conversation = json.loads(data[0]['conversation'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "b9a73849",
   "metadata": {},
   "outputs": [],
   "source": [
    "def loop(rows):\n",
    "    rows, _ = rows\n",
    "    data = []\n",
    "    for row in tqdm(rows):\n",
    "        try:\n",
    "            conversation = json.loads(row['conversation'])\n",
    "            text = processor.apply_chat_template(conversation, tokenize=False)\n",
    "            audio = []\n",
    "            for c in conversation:\n",
    "                if c['role'] == 'user':\n",
    "                    for c_ in c['content']:\n",
    "                        if c_['type'] == 'audio':\n",
    "                            audio.append(c_['audio_url'])\n",
    "        except Exception as e:\n",
    "            continue\n",
    "\n",
    "        data.append({\n",
    "            'text': text,\n",
    "            'audio': audio,\n",
    "        })\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "ff191eb0",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|███████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 3488.89it/s]\n"
     ]
    }
   ],
   "source": [
    "processed = loop((data[-100:], 0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "50fc2131",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "100"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(processed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "fba7dfc2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<|im_start|>system\n",
      "You are a voice-based assistant designed to be brief and precise. All replies must be under 300 characters. Speak in a friendly, natural tone suitable for spoken output. Avoid technical jargon and long replies. Ask short questions if more info is needed.<|im_end|>\n",
      "<|im_start|>user\n",
      "learners. The advantage \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "International Journal Languages and Education (Vol. 1, No 1)                                   \n",
      "\n",
      "\n",
      "\n",
      " \n",
      "89 \n",
      "\n",
      "\n",
      "\n",
      "©The Author(s) (2021). Published by USIM Press on behalf of the Universiti Sains Islam Malaysia.  This is an Open \n",
      "\n",
      "\n",
      "\n",
      "Access article  distributed  under the  terms of the Creative Commons Attribution 4.0 International (CC BY 4.0) license. \n",
      "\n",
      "\n",
      "\n",
      "as per the analysis is concerned with the improvement in engagement of learners. For instance, \n",
      "\n",
      "\n",
      "\n",
      "when the flipped classroom method is considered and used, it plays a critical role in the estab-\n",
      "\n",
      "\n",
      "\n",
      "lishment or development of an environment in which learners can engage with each other. In \n",
      "\n",
      "\n",
      "\n",
      "fact, in the classrooms, students are offered an environment in which they can not only perform \n",
      "\n",
      "\n",
      "\n",
      "different activities but also engage with each other. \n",
      "\n",
      "\n",
      "\n",
      "Through this engagement, students are able to practice their language speaking skills and \n",
      "\n",
      "\n",
      "\n",
      "even language learning better. In this environment, students are enabled to not only converse \n",
      "\n",
      "\n",
      "\n",
      "with each other but also practice the language that they are learning with each other. They can \n",
      "\n",
      "\n",
      "\n",
      "engage with each other and they can even consider role-play. In this manner, it becomes pos-\n",
      "\n",
      "\n",
      "\n",
      "sible for students to make sure that they are able to practice their skills and their language \n",
      "\n",
      "\n",
      "\n",
      "according to the situation to which they are assigned. Another important benefit associated \n",
      "\n",
      "\n",
      "\n",
      "with the technique has been improvement in speaking skills. It has been identified in the anal-\n",
      "\n",
      "\n",
      "\n",
      "ysis that with the use of the flipped classroom method, it becomes possible to improve the \n",
      "\n",
      "\n",
      "\n",
      "speaking skills of learners. \n",
      "\n",
      "\n",
      "\n",
      "As it has been determined above, this method enables educators to provide students with an \n",
      "\n",
      "\n",
      "\n",
      "environment in which it is possible for learners to interact with each other. When they are \n",
      "\n",
      "\n",
      "\n",
      "enabled to interact with each other, they are able to practice their speaking skills better. \n",
      "\n",
      "\n",
      "\n",
      "The review has indicated that the use of flipped classrooms in teaching the Arabic language \n",
      "\n",
      "\n",
      "\n",
      "encourages participation. Alabahuoth (2020) determined that the method is capable of encour-\n",
      "\n",
      "\n",
      "\n",
      "aging and even promoting the participation of students and learners in the classroom. When a \n",
      "\n",
      "\n",
      "\n",
      "flipped classroom method is used, it helps in offering different exercises and activities. It is \n",
      "\n",
      "\n",
      "\n",
      "possible for students and learners to participate in these activities and improve their learning to \n",
      "\n",
      "\n",
      "\n",
      "a significant extent. When this method is used, the educators encourages students to take a part \n",
      "\n",
      "\n",
      "\n",
      "in the exercises and make sure that they achieve the desired results. \n",
      "\n",
      "\n",
      "\n",
      "When the flipped classroom method is used and implemented, it involves the use of different \n",
      "\n",
      "\n",
      "\n",
      "activities and even exercises in the classroom. It plays a critical role in providing students and \n",
      "\n",
      "\n",
      "\n",
      "learners with hands-on experience. They are able to not only practice writing the language but \n",
      "\n",
      "\n",
      "\n",
      "they are also able to learn new words and grammar. When they frequently engage in such \n",
      "\n",
      "\n",
      "\n",
      "activities, it serves to improve not only the writing and grammatical skills of learners and stu-\n",
      "\n",
      "\n",
      "\n",
      "dents, it also encourages students to improve their vocabulary as they learn from the activities \n",
      "\n",
      "\n",
      "\n",
      "and exercises. \n",
      "\n",
      "\n",
      "\n",
      "It is, however, important to note that some challenges with the use of the flipped classroom \n",
      "\n",
      "\n",
      "\n",
      "method have also been revealed by the review. Rumzan (2020) indicates that a major challenge \n",
      "\n",
      "\n",
      "\n",
      "with the use of the flipped classroom method is the unfamiliarity of both educators and learners \n",
      "\n",
      "\n",
      "\n",
      "in the context of the Arabic language. Not many instructors and students are aware of the tech-\n",
      "\n",
      "\n",
      "\n",
      "nique and how it works. Similarly, another major issue associated with the technique is differ-\n",
      "\n",
      "\n",
      "\n",
      "ent technological issues as identified by Islieh et al. (2022).  \n",
      "\n",
      "\n",
      "\n",
      "The flipped classroom method involves the use of different technologies. These \n",
      "\n",
      "\n",
      "\n",
      "technologies have different complexities and issues that need to be addressed. Rumzan (2020) \n",
      "\n",
      "\n",
      "\n",
      "even determines that a challenge is the lack of readiness of instructors to accept the method. \n",
      "\n",
      "\n",
      "\n",
      "Since not many instructors are aware of the technique and how it can benefit the learning of \n",
      "\n",
      "\n",
      "\n",
      "students, they often exhibit and show resistance to the adoption of the method. There is a crit-\n",
      "\n",
      "\n",
      "\n",
      "ical need to address these challenges or else, it would not be possible to implement it in teaching \n",
      "\n",
      "\n",
      "\n",
      "the Arabic language to students and learners. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      " \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "International Journal Languages and Education (Vol. 1, No 1)                                   \n",
      "\n",
      "\n",
      "\n",
      " \n",
      "90 \n",
      "\n",
      "\n",
      "\n",
      "©The Author(s) (2021). Published by USIM Press on behalf of the Universiti Sains Islam Malaysia.  This is an Open \n",
      "\n",
      "\n",
      "\n",
      "Access article  distributed  under the  terms of the Creative Commons Attribution 4.0 International (CC BY 4.0) license. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "CONCLUSION \n",
      " \n",
      "\n",
      "\n",
      "\n",
      "The flipped classroom method is effective in facilitating the learning of students and teaching \n",
      "\n",
      "\n",
      "\n",
      "the Arabic language. This study is one of the few to have been carried out in the context of the \n",
      "\n",
      "\n",
      "\n",
      "Arabic language with a focus on the use of the flipped classroom technique. Through a system-\n",
      "\n",
      "\n",
      "\n",
      "atic review, it has been identified that the flipped classroom method has significant potential in \n",
      "\n",
      "\n",
      "\n",
      "benefitting both instructors and learners. However, at the same time, there are some challenges \n",
      "\n",
      "\n",
      "\n",
      "associated with the adoption of this method. These challenges must be addressed to further \n",
      "\n",
      "\n",
      "\n",
      "facilitate the use of the flipped classroom method. On the basis of the findings of this study, \n",
      "\n",
      "\n",
      "\n",
      "the following recommendations are offered: \n",
      "\n",
      "\n",
      "\n",
      "Recommendations for Research \n",
      "\n",
      "\n",
      "\n",
      "• There is a need to carry out further experimental studies on the impacts of the flipped \n",
      "\n",
      "\n",
      "\n",
      "classroom technique on Arabic language teaching. In addition, qualitative studies must \n",
      "\n",
      "\n",
      "\n",
      "be performed for offering insights into the use of the technique in classrooms. \n",
      "\n",
      "\n",
      "\n",
      "• Further experimental research needs to be carried out under different class standards.  \n",
      "\n",
      "\n",
      "\n",
      "• In the future, studies should specifically identify the effect of the flipped classroom \n",
      "\n",
      "\n",
      "\n",
      "technique on the vocabulary, reading, listening, and grammar knowledge of students \n",
      "\n",
      "\n",
      "\n",
      "and learners. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Recommendations for Practitioners \n",
      "\n",
      "\n",
      "\n",
      "• Educators need to comprehend how the flipped classroom method can be applied and \n",
      "\n",
      "\n",
      "\n",
      "used before they actually consider its application. Information should be gathered about \n",
      "\n",
      "\n",
      "\n",
      "it to facilitate their understanding and ensure that the desired results are achieved with-\n",
      "\n",
      "\n",
      "\n",
      "out experiencing major difficulties and issues. \n",
      "\n",
      "\n",
      "\n",
      "• Educators must focus on the maximization of benefits that are offered by the flipped \n",
      "\n",
      "\n",
      "\n",
      "classroom technique by considering the needs of learners before they engage in the \n",
      "\n",
      "\n",
      "\n",
      "development of their courses. \n",
      "\n",
      "\n",
      "\n",
      "• The use of the flipped classroom technique involves the utilization of different techno-\n",
      "\n",
      "\n",
      "\n",
      "logical tools and methods. Thus, it must be ensured that both the students and instruc-\n",
      "\n",
      "\n",
      "\n",
      "tors have access to the desired technologies, and they are also aware of how to use them \n",
      "\n",
      "\n",
      "\n",
      "effectively. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "REFERENCES \n",
      "Aburezeq, I. M. (2020). The impact of flipped classroom on developing Arabic speaking skills. The \n",
      "\n",
      "\n",
      "\n",
      "Asia-Pacific Education Researcher, 29(4), 295-306. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Albahuoth, H. (2020). Effectiveness of flipped classroom in developing 11th graders’ grammatical \n",
      "\n",
      "\n",
      "\n",
      "competences in Arabic. Interactive Learning Environments, 1-17. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Amiryousefi, M. (2019). The incorporation of flipped learning into conventional classes to enhance \n",
      "\n",
      "\n",
      "\n",
      "EFL learners’ L2 speaking, L2 listening, and engagement. Innovation in Language Learning and \n",
      "\n",
      "\n",
      "\n",
      "Teaching, 13(2), 147-161. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Boyraz, S., & Ocak, G. (2017). Implementation of flipped education into Turkish EFL teaching context. \n",
      "\n",
      "\n",
      "\n",
      "Journal of Language and Linguistic Studies, 13(2), 426-439. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Brown, H. D., & Lee, H. (2015). Teaching principles. P. Ed Australia. \n",
      "\n",
      "\n",
      "\n",
      " \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "International Journal Languages and Education (Vol. 1, No 1)                                   \n",
      "\n",
      "\n",
      "\n",
      " \n",
      "91 \n",
      "\n",
      "\n",
      "\n",
      "©The Author(s) (2021). Published by USIM Press on behalf of the Universiti Sains Islam Malaysia.  This is an Open \n",
      "\n",
      "\n",
      "\n",
      "Access article  distributed  under the  terms of the Creative Commons Attribution 4.0 International (CC BY 4.0) license. \n",
      "\n",
      "\n",
      "\n",
      "Clough, P., & Nutbrown, C. (2012). A Student′ s Guide to Methodology. Sage. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Dörnyei, Z., & Ushioda, E. (2013). Teaching and researching: Motivation. Routledge. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Hooks, B. (2014). Teaching to transgress. Routledge. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Hsieh, J. S., Wu, W. C. V., & Marek, M. W. (2016). Using the flipped classroom to enhance EFL \n",
      "\n",
      "\n",
      "\n",
      "learning. Computer Assisted Language Learning, 1-25. \n",
      "\n",
      "\n",
      "\n",
      "http://dx.doi.org/10.1080/09588221.2015.1111910 \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Islieh, A. R. I. S., Jailani, M. R. M., Zakaria, Z., Zainuddin, N., & Theis, N. A. (2022). Engaging Sec-\n",
      "\n",
      "\n",
      "\n",
      "ondary School Students in Learning Arabic Language through Flipped Classroom Using Creative \n",
      "\n",
      "\n",
      "\n",
      "MOOC Design. Journal of Language and Linguistic Studies, 17(4). \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Jwaifell, M., Abu-Omar, R., & Al-Tarawneh, M. (2018). The Readiness of Arabic Language Teachers \n",
      "\n",
      "\n",
      "\n",
      "for Integrating Flipped Classroom: Case of Ma'an. International Journal of Instruction, 11(4), 855-\n",
      "\n",
      "\n",
      "\n",
      "868. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Rumzan, I. (2020). Best Practices Using Flipped Classroom in Teaching a Second Language in Differ-\n",
      "\n",
      "\n",
      "\n",
      "ent Learning Environments. In Emerging Technologies and Pedagogies in the Curriculum (pp. 399-\n",
      "\n",
      "\n",
      "\n",
      "412). Springer, Singapore. \n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "\n",
      "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\n",
      "<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Pelaksanaan bilik darjah flipped dalam pengajaran bahasa Arab menghadapi cabaran seperti kurang pemahaman, kesedaran, dan isu teknologi. Penting untuk menyelesaikan cabaran ini supaya kaedah tersebut dapat digunakan dengan berkesan.<|im_end|>\n",
      "<|im_start|>user\n",
      "Audio 2: <|audio_bos|><|AUDIO|><|audio_eos|>\n",
      "<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Untuk melaksanakan kaedah bilik darjah flipped dalam pengajaran bahasa Arab, pendidik perlu mendapatkan latihan, merancang kursus dengan teliti, menyediakan sokongan teknologi, dan menggalakkan kerjasama. Dengan pendekatan yang betul, kaedah ini boleh meningkatkan pembelajaran pelajar.<|im_end|>\n",
      "<|im_start|>user\n",
      "Audio 3: <|audio_bos|><|AUDIO|><|audio_eos|>\n",
      "<|im_end|>\n",
      "<|im_start|>assistant\n",
      "Untuk mengatasi cabaran dalam kaedah bilik darjah terbalik, pastikan ada sumber berkualiti, sokongan teknikal, komunikasi berterusan, penglibatan pelajar, dan latihan pendidik. Ini akan menjadikan pembelajaran lebih berkesan dan menyeronokkan.<|im_end|>\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(processed[-1]['text'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "f1a0ab3d",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['ultrachat-speech/195824.mp3',\n",
       " 'ultrachat-speech/195825.mp3',\n",
       " 'ultrachat-speech/195826.mp3']"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processed[-1]['audio']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "01c023b5",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4159.67it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4542.31it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4509.73it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4493.10it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4531.37it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4250.47it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4519.10it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4305.02it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4396.52it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4399.96it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4520.91it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4538.75it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4531.00it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4031.98it/s]\n",
      "  0%|                                                                                              | 0/6427 [00:00<?, ?it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4027.84it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4519.11it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4524.22it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4453.72it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4540.69it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 3480.99it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4514.58it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4313.33it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4446.62it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4490.33it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4390.25it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 3518.98it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4476.68it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4406.34it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 4092.63it/s]\n",
      "100%|█████████████████████████████████████████████████████████████████████████████████| 6427/6427 [00:01<00:00, 3992.02it/s]\n"
     ]
    }
   ],
   "source": [
    "processed = multiprocessing(data, loop, cores = 30)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e3618913",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "192821"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(processed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "46cca199",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('prepare-Malaysian-UltraChat-Speech-Multiturn-Instructions.json', 'w') as fopen:\n",
    "    json.dump(processed, fopen)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
